import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Create a meshgrid for x and y
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
x, y = np.meshgrid(x, y)

# Create a figure with three subplots in a single row
fig, axs = plt.subplots(1, 3, figsize=(15, 5), subplot_kw={'projection': '3d'})

# Plot for the first subplot (Local Minimum)
axs[0].plot_surface(x, y, x**2 + y**2, cmap='viridis', alpha=0.8)
axs[0].scatter(0, 0, 0, color='black', s=50)
axs[0].set_title('Local Minimum')

# Plot for the second subplot (Local Maximum)
axs[1].plot_surface(x, y, -x**2 - y**2, cmap='viridis', alpha=0.8)  # Negative z for inverted surface
axs[1].scatter(0, 0, 0, color='black', s=50)
axs[1].set_title('Local Maximum')

# Plot for the third subplot (Saddle Point)
axs[2].plot_surface(x, y, x**2 - y**2, cmap='viridis', alpha=0.8)  # Example of a saddle point
axs[2].scatter(0, 0, 0, color='black', s=50)
axs[2].set_title('Saddle Point -- Neither a Local Minimum nor a Local Maximum')

# Add labels
for ax in axs:
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel(r'$f(x, y)$')

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()


# Function to demonstrate Jensen's inequality
def convex_function(x):
    return x**2

# Generate random data
num_samples = 100
random_values = np.random.normal(loc=0, scale=1, size=num_samples)

# Calculate the left side of Jensen's inequality (expectation of the convex function)
left_side = np.mean(convex_function(random_values))

# Calculate the right side of Jensen's inequality (convex function of the expectation)
right_side = convex_function(np.mean(random_values))

# Plot the function and the random samples
x_values = np.linspace(-3, 3, 100)
y_values = convex_function(x_values)

plt.plot(x_values, y_values, label='$f(x) = x^2$ (Convex Function)')
plt.scatter(random_values, convex_function(random_values), color='red', alpha=0.5, label='Random Samples')

# Plot the expected values
plt.axhline(y=left_side, color='blue', linestyle='--', label='E[$f(x)$]')
plt.axhline(y=right_side, color='green', linestyle='--', label='$f($E$[x])$')

plt.xlabel('Random Variable (x)')
plt.ylabel('Function Value')
plt.title("Illustration of Jensen's Inequality")
plt.legend()
plt.grid(True)
plt.show()


# Convex function: f(x) = x^2
def convex_function(x):
    return x**2

# Tangent line (lower bounding linear approximation) at a given point x
def tangent_line(x, x_point):
    slope = 2 * x_point  # Derivative of convex_function(x) = x^2
    return slope * (x - x_point) + convex_function(x_point)

# Points for illustration
x1 = -2
x2 = 2

# Plot the convex function
x_values = np.linspace(-3, 3, 100)
y_values = convex_function(x_values)

# Plot tangent lines at x1, x2
plt.plot(x_values, y_values, label='Convex Function: $f(x) = x^2$')
plt.plot(x_values, tangent_line(x_values, x1), label=f'Tangent Line at $x_1={x1}$')
plt.plot(x_values, tangent_line(x_values, x2), label=f'Tangent Line at $x_2={x2}$')

# Mark points on the curve
plt.scatter([x1, x2], [convex_function(x1), convex_function(x2)], color='red')

plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.title('Illustration of Convex Function and Lower Bounding Linear Approximation')
plt.legend()
plt.grid(True)
plt.show()


import numpy as np
import matplotlib.pyplot as plt

# Define the piecewise linear function
def piecewise_linear(x):
    return np.piecewise(
        x,
        [x < 0, (x >= 0) & (x <= 2), x > 2],
        [lambda x: 2*x + 3, lambda x: -x + 3, lambda x: x - 1]
    )

# Define the derivative of the piecewise linear function
def derivative_piecewise_linear(x):
    return np.piecewise(
        x,
        [x < 0, (x >= 0) & (x <= 2), x > 2],
        [lambda x: 2, lambda x: -1, lambda x: 1]
    )

# Generate x values
x_values = np.linspace(-1, 3, 500)

# Calculate corresponding y values for the function and its derivative
y_values = piecewise_linear(x_values)
dy_values = derivative_piecewise_linear(x_values)

# Create side-by-side plots
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Plot the piecewise linear function
axes[0].plot(x_values, y_values, color='blue')
axes[0].set_xlabel('x-axis')
axes[0].set_ylabel('y-axis')
axes[0].set_title('Example Function')
axes[0].grid(True)

# Plot the derivative of the piecewise linear function
axes[1].plot(x_values, dy_values, color='red')
axes[1].set_xlabel('x-axis')
axes[1].set_ylabel('y-axis')
axes[1].set_title('Derivative of the Example Function')
axes[1].grid(True)

# Adjust layout and display
plt.tight_layout()
plt.show()


import numpy as np
import matplotlib.pyplot as plt

# Non-convex function: f(x) = x^3 - 8x^2 + 2x
def non_convex_function(x):
    return x**3 - 8 * x**2 + 2 * x

# Gradient of the non-convex function: f'(x) = 3x^2 - 16x + 2
def gradient_non_convex_function(x):
    return 3 * x**2 - 16 * x + 2

# Gradient descent update rule
def gradient_descent_update(x, learning_rate):
    return x - learning_rate * gradient_non_convex_function(x)

# Initial parameter value
initial_x = 0.2

# Learning rate (step size)
learning_rate = 0.1  

# Number of iterations
num_iterations = 4

# Lists to store values for plotting
x_values = [initial_x]
y_values = [non_convex_function(initial_x)]

# Perform gradient descent updates
for _ in range(num_iterations):
    updated_x = gradient_descent_update(initial_x, learning_rate)
    x_values.append(updated_x)
    y_values.append(non_convex_function(updated_x))
    initial_x = updated_x

# Generate data for plotting
x_range = np.linspace(-1, 3, 100)
y_range = non_convex_function(x_range)
gradient_range = np.abs(gradient_non_convex_function(x_range))

# Create side-by-side plots
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Plot the non-convex function and gradient descent updates
axes[0].plot(x_range, y_range, label=r'Non-Convex Function: $f(x) = x^3 - 8x^2 + 2x$', color='blue')
axes[0].scatter(x_values, y_values, color='red', label='Gradient Descent Updates', marker='>', s=75)
axes[0].set_xlabel('x-axis')
axes[0].set_ylabel('y-axis')
axes[0].set_title('Non-Convex Function with Gradient Descent Updates')
axes[0].legend()
axes[0].grid(True)

# Plot the derivative of the non-convex function
axes[1].plot(x_range, gradient_range, label=r"Gradient magnitude: $|f'(x)| = |3x^2 - 16x + 2|$", color='orange')
axes[1].scatter(x_values, np.abs(gradient_non_convex_function(np.array(x_values))), color='red', label='Gradient at Updates', marker='>', s=75)
axes[1].set_xlabel('x-axis')
axes[1].set_ylabel('y-axis')
axes[1].set_title('Magnitude of the Derivative')
axes[1].legend()
axes[1].grid(True)

# Adjust layout and display
plt.tight_layout()
plt.show()

5 - Optimization¶

5.1 Unconstrained Optimization and High-Dimensional Derivatives¶

5.2 Local and Global Minima, High-Dimensional Derivative Test¶

5.3 Convexity¶

5.4 Some Regularity Properties of Objective Functions¶

5.5 Gradient Descent¶

5.5.1 Sufficient Descent and Convergence to a Stationary Point¶

5.5.2 Reducing the Optimality Gap for Convex Functions¶

5.6 Stochastic Gradient Method¶

Bibliographic Notes¶